library(tidyverse)
library(forcats)
library(dplyr)
library(glue)
library(brms)
library(tidybayes)
library(bayesplot)
library(cowplot)
nCraft_first_round <- 19337
nBennet_first_round <- 13468
Brakey_transfer <- 9542
actual_final_round <- data.frame(
crafts = 22888,
bennett = 16207)
actual_transfer_perc <- data.frame(
crafts = 3551/Brakey_transfer,
bennett = 2739/Brakey_transfer
)
# given the probabilities of each option and the number of ballots to redistribute, return the number of ballots redistributed to Bennett and Craft.
multinomial_p <- function(pBennet, pCraft, pUndecided, n){
r <- rmultinom(1, n, prob = c(pBennet, pCraft, pUndecided))
return(tibble(nBennet = r[1, 1], nCraft = r[2, 1]))
}
mnl_predict <- function(samples){
samples %>%
mutate(pBennet = exp(b_muBennet_Intercept)/(1 + exp(b_muBennet_Intercept) + exp(b_muCrafts_Intercept)),
pCraft = exp(b_muCrafts_Intercept)/(1 + exp(b_muBennet_Intercept) + exp(b_muCrafts_Intercept)),
pUndecided = 1/(1 + exp(b_muBennet_Intercept) + exp(b_muCrafts_Intercept)),
multinom_dist = pmap(list(pBennet, pCraft, pUndecided, Brakey_transfer), multinomial_p)) %>%
unnest(multinom_dist) %>%
mutate(predCraft_final_round = nCraft_first_round + nCraft,
predBennet_final_round = nBennet_first_round + nBennet,
predCraft_final_round_perc = predCraft_final_round/(predCraft_final_round + predBennet_final_round),
predBennet_final_round_perc = predBennet_final_round/(predBennet_final_round + predCraft_final_round),
craft_minus_bennet = predCraft_final_round - predBennet_final_round,
craft_win = craft_minus_bennet > 0)
}
We know Brakey will be eliminated, so filter rows to include only those have Brakey marked first. Treat non-response for rank2 as equivalent to “Undecided”.
cvr_df<- read_csv("data/july2020_maine_house/cvr.csv")
candidate_codes_df <- read_csv("data/july2020_maine_house/candidate_codes.csv")
# filter only rows with first choice == 2
cvr_subset_df <-
cvr_df %>%
filter(rank1 == 2) %>%
replace_na(list(rank2 = 4)) %>%
mutate(rank1 = recode_factor(rank1, `1` = "Bennet", `2` = "Brakey", `3` = "Crafts", `4` = "Undecided"),
rank2 = recode_factor(rank2, `1` = "Bennet", `2` = "Brakey", `3` = "Crafts", `4` = "Undecided")) %>%
select(first_choice = rank1, final_choice = rank2, weight)
head(cvr_subset_df, 20)
## # A tibble: 20 x 3
## first_choice final_choice weight
## <fct> <fct> <dbl>
## 1 Brakey Crafts 1.04
## 2 Brakey Crafts 0.808
## 3 Brakey Bennet 0.900
## 4 Brakey Undecided 0.948
## 5 Brakey Bennet 0.900
## 6 Brakey Undecided 0.808
## 7 Brakey Undecided 0.583
## 8 Brakey Bennet 1.34
## 9 Brakey Bennet 1.32
## 10 Brakey Crafts 1.04
## 11 Brakey Bennet 1.13
## 12 Brakey Bennet 1.09
## 13 Brakey Crafts 0.895
## 14 Brakey Undecided 1.32
## 15 Brakey Crafts 1.35
## 16 Brakey Crafts 0.583
## 17 Brakey Undecided 0.948
## 18 Brakey Undecided 0.900
## 19 Brakey Bennet 0.583
## 20 Brakey Crafts 1.35
cvr_subset_df %>%
group_by(final_choice) %>%
summarize(weighted_n = sum(weight)) %>%
mutate(weighted_perc = weighted_n/sum(weighted_n))
## # A tibble: 3 x 3
## final_choice weighted_n weighted_perc
## <fct> <dbl> <dbl>
## 1 Bennet 37.7 0.334
## 2 Crafts 45.0 0.399
## 3 Undecided 30.1 0.267
# store results for later plotting
poll_mle <- data.frame(crafts = 0.399, bennett = 0.333)
Use a multinomial logistic model. Assume the choices are distributed by a multinomial likelihood.
\[ rank2_i \sim Multinomial(n=1, \pi_{Crafts}, \pi_{Bennett}, \pi_{Undecided}) \\ \pi_{Crafts} = P(rank2 = Crafts)\\ \pi_{Bennett} = P(rank2 = Bennett)\\ \pi_{Undecided} = P(rank2 = Undecided) \]
The goal is to estimate the choice probabilities. In this case, because the model is simple (only one candidate being eliminated), a Dirichlet prior could be placed on the 3 choice probabilities.
\[ (\pi_{Crafts}, \pi_{Bennett}, \pi_{Undecided}) \sim Dirichlet(\alpha_1, \alpha_2, \alpha_3) \\ \alpha_1, \alpha_2, \alpha_3 \sim distribution() \] An alternative, which maintains more interpretable parameters as the regression becomes more complex, is to estimate the choice probabilities via their log odds ratios, using “Undecided” as a reference category.
\[ \displaystyle \log(\frac{P(Y = Crafts)}{P(Y = Undecided)}) = \alpha_{Crafts} \\ \displaystyle \log(\frac{P(Y = Bennett)}{P(Y = Undecided)}) = \alpha_{Bennett} \]
(Since we are only redistributing Brakey’s votes in this example, there are no other predictors in the regression equation. Just an intercept. If we were redistributing ballots from two candidates, those two candidates could be included as predictors in addition to the intercepts.)
The log odds ratios can be converted into choice probabilities for use in the likelihood function:
\[ P(Y = Crafts) = \frac{e^{a_{Crafts}}}{e^{a_{Crafts}} + e^{a_{Bennett}} + 1} \\ P(Y = Bennett) = \frac{e^{a_{Bennett}}}{e^{a_{Crafts}} + e^{a_{Bennett}} + 1} \\ P(Y = Undecided) = \frac{1}{e^{a_{Crafts}} + e^{a_{Bennett}} + 1} \] All that’s left is a prior on each parameter. Aim for priors that lead the model to predict, prior to seeing any data, that Brakey’s ballots get redistributed evenly between Crafts, Bennett, and Undecided.
\[ \alpha_{Crafts} \sim Normal(0, 1.25) \\ \alpha_{Bennett} \sim Normal(0, 1.25) \]
Craft received 19,337 votes in the first round.
Bennet received 13,468 votes in the first round.
Brakey received 9,542 votes in the first round.
Simulate the re-distribution of Brakey’s ballots using samples from the prior
For each sample of prior values for the intercepts.
For each draw, convert the two intercepts into the three probabilities (Craft, Bennet, and Undecided/Exhaust).
Simulate the redistribution of Brakey’s votes.
Add the simulated redistribution to Craft’s and Bennet’s first round total to get an estimate of their final round totals.
\[ \alpha_{Crafts} \sim Normal(0, 10) \\ \alpha_{Bennett} \sim Normal(0, 10) \]
pr <- prior(normal(0, 10), dpar = "muBennet", class = "Intercept") +
prior(normal(0, 10), dpar = "muCrafts", class = "Intercept")
fit <- brm(formula = final_choice|weights(weight) ~ 1,
data = cvr_subset_df,
family = categorical(refcat = "Undecided"),
prior = pr, sample_prior = "only",
chains = 4, iter = 2000,
file = "intercept_only_badprior")
prior_samples <- spread_draws(fit, b_muBennet_Intercept, b_muCrafts_Intercept)
prior_samples_pred <- mnl_predict(prior_samples)
posterior_summary(fit)
## Estimate Est.Error Q2.5 Q97.5
## b_muBennet_Intercept -0.05594890 9.905799 -19.49533 19.04813
## b_muCrafts_Intercept -0.04455137 9.976012 -19.33370 19.35787
## lp__ -7.43105413 1.005720 -10.16101 -6.46952
ggplot(prior_samples_pred) +
geom_point(aes(x = b_muCrafts_Intercept, y = b_muBennet_Intercept), alpha = 0.4) + theme_light()
ggplot(prior_samples_pred) +
geom_point(aes(x = pCraft, y = pBennet), alpha = 0.4) +
xlab("probability of Brakey -> Craft") +
ylab("probability of Brakey -> Bennett") +
labs(subtitle = "Prior distribution of transfer probabilities to Crafts and Bennett") +
theme_light()
ggplot(prior_samples_pred) +
geom_point(aes(x = predCraft_final_round, y = predBennet_final_round), alpha = 0.4) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
theme_light() +
labs(subtitle = "Predicitive distribution for Crafts and Bennet final round counts.") +
xlab("predicted final count - Crafts") +
ylab("Bennett") +
theme(legend.position = "none")
crafts <-
ggplot(prior_samples_pred) +
geom_histogram(aes(x = predCraft_final_round_perc), binwidth = 0.01, color="black", fill="lightblue") +
theme_light() +
labs(subtitle = "Predicitive distribution for Crafts final round percent.") +
xlab("predicted final percent - Crafts") +
xlim(0, 1)
bennett <-
ggplot(prior_samples_pred) +
geom_histogram(aes(x = predBennet_final_round_perc), binwidth = 0.01, color="black", fill="lightblue") +
theme_light() +
labs(subtitle = "Predicitive distribution for Bennett final round percent.") +
xlab("predicted final percent - Bennett") +
xlim(0, 1)
plot_grid(crafts, bennett, ncol = 1)
ggplot(prior_samples_pred) +
geom_histogram(aes(x = craft_minus_bennet), binwidth = 50) +
xlab("Crafts minus Bennett") +
labs(subtitle = "Predictive distribution of differences in final round counts.") +
theme_light()
Summary statistics:
prior_samples_pred %>%
summarise(mean_craft = mean(predCraft_final_round),
lower95_craft = quantile(predCraft_final_round, 0.025),
upper95_craft = quantile(predCraft_final_round, 0.975),
mean_bennett = mean(predBennet_final_round),
lower95_bennett = quantile(predBennet_final_round, 0.025),
upper95_bennett = quantile(predBennet_final_round, 0.975),
prob_craft_win = mean(craft_win)) %>%
pivot_longer(everything(), names_to = "measurement")
## # A tibble: 7 x 2
## measurement value
## <chr> <dbl>
## 1 mean_craft 22906.
## 2 lower95_craft 19337
## 3 upper95_craft 28879
## 4 mean_bennett 17023.
## 5 lower95_bennett 13468
## 6 upper95_bennett 23010
## 7 prob_craft_win 0.658
prior_samples_pred %>%
summarise(mean_pCraft = mean(pCraft),
lower95_pCraft = quantile(pCraft, probs = 0.025),
upper95_pCraft = quantile(pCraft, probs = 0.975),
mean_pBennet = mean(pBennet),
lower95_pBennet = quantile(pBennet, probs = 0.025),
upper95_pBennet = quantile(pCraft, probs = 0.975)) %>%
pivot_longer(everything(), names_to = "measurement") %>%
mutate_if(is.numeric, round, 2)
## # A tibble: 6 x 2
## measurement value
## <chr> <dbl>
## 1 mean_pCraft 0.37
## 2 lower95_pCraft 0
## 3 upper95_pCraft 1
## 4 mean_pBennet 0.37
## 5 lower95_pBennet 0
## 6 upper95_pBennet 1
\[ \alpha_{Crafts} \sim Normal(0, 1.25) \\ \alpha_{Bennett} \sim Normal(0, 1.25) \]
pr <- prior(normal(0, 1.25), dpar = "muBennet", class = "Intercept") +
prior(normal(0, 1.25), dpar = "muCrafts", class = "Intercept")
fit <- brm(formula = final_choice|weights(weight) ~ 1,
data = cvr_subset_df,
family = categorical(refcat = "Undecided"),
prior = pr, sample_prior = "only",
chains = 4, iter = 2000,
file = "intercept_only_betterprior")
prior_samples <- spread_draws(fit, b_muBennet_Intercept, b_muCrafts_Intercept)
prior_samples_pred <- mnl_predict(prior_samples)
posterior_summary(fit)
## Estimate Est.Error Q2.5 Q97.5
## b_muBennet_Intercept -0.01003348 1.263264 -2.505414 2.551273
## b_muCrafts_Intercept 0.01984493 1.265194 -2.412689 2.603372
## lp__ -3.30696363 1.016384 -6.183761 -2.310923
ggplot(prior_samples_pred) +
geom_point(aes(x = b_muCrafts_Intercept, y = b_muBennet_Intercept), alpha = 0.4) + theme_light()
ggplot(prior_samples_pred) +
geom_point(aes(x = pCraft, y = pBennet), alpha = 0.4) +
xlab("probability of Brakey -> Craft") +
ylab("probability of Brakey -> Bennett") +
labs(subtitle = "Prior distribution of transfer probabilities to Crafts and Bennett") +
theme_light()
ggplot(prior_samples_pred) +
geom_point(aes(x = predCraft_final_round, y = predBennet_final_round), alpha = 0.4) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
theme_light() +
labs(subtitle = "Predicitive distribution for Crafts and Bennet final round counts.") +
xlab("predicted final count - Crafts") +
ylab("Bennett") +
theme(legend.position = "none")
crafts <-
ggplot(prior_samples_pred) +
geom_histogram(aes(x = predCraft_final_round_perc), binwidth = 0.01, color="black", fill="lightblue") +
theme_light() +
labs(subtitle = "Predicitive distribution for Crafts final round percent.") +
xlab("predicted final percent - Crafts") +
xlim(0, 1)
bennett <-
ggplot(prior_samples_pred) +
geom_histogram(aes(x = predBennet_final_round_perc), binwidth = 0.01, color="black", fill="lightblue") +
theme_light() +
labs(subtitle = "Predicitive distribution for Bennett final round percent.") +
xlab("predicted final percent - Bennett") +
xlim(0, 1)
plot_grid(crafts, bennett, ncol = 1)
## Warning: Removed 2 rows containing missing values (geom_bar).
## Warning: Removed 2 rows containing missing values (geom_bar).
ggplot(prior_samples_pred) +
geom_histogram(aes(x = craft_minus_bennet), binwidth = 30) +
xlab("Crafts minus Bennett") +
labs(subtitle = "Predictive distribution of differences in final round counts.") +
theme_light()
Some statisics:
prior_samples_pred %>%
summarise(mean_craft = mean(predCraft_final_round),
lower95_craft = quantile(predCraft_final_round, 0.025),
upper95_craft = quantile(predCraft_final_round, 0.975),
mean_bennett = mean(predBennet_final_round),
lower95_bennett = quantile(predBennet_final_round, 0.025),
upper95_bennett = quantile(predBennet_final_round, 0.975),
prob_craft_win = mean(craft_win)) %>%
pivot_longer(everything(), names_to = "measurement")
## # A tibble: 7 x 2
## measurement value
## <chr> <dbl>
## 1 mean_craft 22706.
## 2 lower95_craft 19554.
## 3 upper95_craft 27607.
## 4 mean_bennett 16775.
## 5 lower95_bennett 13682.
## 6 upper95_bennett 21667.
## 7 prob_craft_win 0.9
prior_samples_pred %>%
summarise(mean_pCraft = mean(pCraft),
lower95_pCraft = quantile(pCraft, probs = 0.025),
upper95_pCraft = quantile(pCraft, probs = 0.975),
mean_pBennet = mean(pBennet),
lower95_pBennet = quantile(pBennet, probs = 0.025),
upper95_pBennet = quantile(pCraft, probs = 0.975)) %>%
pivot_longer(everything(), names_to = "measurement") %>%
mutate_if(is.numeric, round, 2)
## # A tibble: 6 x 2
## measurement value
## <chr> <dbl>
## 1 mean_pCraft 0.35
## 2 lower95_pCraft 0.02
## 3 upper95_pCraft 0.87
## 4 mean_pBennet 0.35
## 5 lower95_pBennet 0.02
## 6 upper95_pBennet 0.87
pr <- prior(normal(0, 1.25), dpar = "muBennet", class = "Intercept") +
prior(normal(0, 1.25), dpar = "muCrafts", class = "Intercept")
fit <- brm(formula = final_choice|weights(weight) ~ 1,
data = cvr_subset_df,
family = categorical(refcat = "Undecided"),
prior = pr, sample_prior = "no",
chains = 4, iter = 2000,
file = "intercept_only")
post_samples <- spread_draws(fit, b_muBennet_Intercept, b_muCrafts_Intercept)
post_samples_pred <- mnl_predict(post_samples)
write_csv(post_samples_pred, "posterior_predictions.csv")
posterior_summary(fit)
## Estimate Est.Error Q2.5 Q97.5
## b_muBennet_Intercept 0.2160087 0.2428377 -0.25505958 0.683251
## b_muCrafts_Intercept 0.3924274 0.2311423 -0.04979993 0.847832
## lp__ -125.8704200 1.0149442 -128.50380800 -124.900336
Craft received 19,337 votes in the first round.
Bennett received 13,468 votes in the first round.
Brakey received 9,542 votes in the first round.
Simulate the re-distribution of Brakey’s ballots using samples from the posterior.
For each sample of posterior values for the intercepts.
For each draw, convert the two intercepts into the three probabilities (Craft, Bennet, and Undecided/Exhaust).
Simulate the redistribution of Brakey’s votes.
Add the simulated redistribution to Craft’s and Bennet’s first round total to get an estimate of their final round totals.
ggplot(post_samples_pred) +
geom_point(aes(x = b_muCrafts_Intercept, y = b_muBennet_Intercept), alpha = 0.4) + theme_light()
ggplot(post_samples_pred) +
geom_point(aes(x = pCraft, y = pBennet), alpha = 0.4) +
geom_point(data = actual_transfer_perc, aes(x = crafts, y = bennett), color = "red", size = 2) +
geom_point(data = prior_samples_pred, aes(x = pCraft, y = pBennet), color = "blue", alpha = 0.01) +
geom_point(data = poll_mle, aes(x = crafts, y = bennett), color = "green", size = 2) +
xlim(0,1) +
ylim(0,1) +
xlab("probability of Brakey -> Craft") +
ylab("probability of Brakey -> Bennett") +
labs(subtitle = "Posterior distribution of transfer probabilities to Crafts and Bennett.\nRed dot is actual Brakey tranfer probability.\nGreen dot is MLE from poll.\nBlue points indicate prior distribution.") +
theme_light()
ggplot(post_samples_pred) +
geom_point(aes(x = predCraft_final_round, y = predBennet_final_round), alpha = 0.4) +
geom_point(data = prior_samples_pred, aes(x = predCraft_final_round, y = predBennet_final_round), alpha = 0.01, color = "blue") +
geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
theme_light() +
geom_point(data = actual_final_round, aes(x = crafts, y = bennett), color = "red", size = 2) +
xlim(nCraft_first_round, nCraft_first_round + Brakey_transfer) +
ylim(nBennet_first_round, nBennet_first_round + Brakey_transfer) +
labs(subtitle = "Predicitive distribution for Crafts and Bennet final round counts.\nRed dot is actual final round count.\nLight blue points indicate prior distribution.") +
xlab("predicted final count - Crafts") +
ylab("Bennett") +
theme(legend.position = "none")
crafts <-
ggplot(post_samples_pred) +
geom_histogram(aes(x = predCraft_final_round_perc), binwidth = 0.01, color="black", fill="lightblue") +
theme_light() +
labs(subtitle = "Predicitive distribution for Crafts final round percent.") +
xlab("posterior predicted final percent - Crafts") +
xlim(0, 1)
bennett <-
ggplot(post_samples_pred) +
geom_histogram(aes(x = predBennet_final_round_perc), binwidth = 0.01, color="black", fill="lightblue") +
theme_light() +
labs(subtitle = "Predicitive distribution for Bennett final round percent.") +
xlab("posterior predicted final percent - Bennett") +
xlim(0, 1)
plot_grid(crafts, bennett, ncol = 1)
ggplot(post_samples_pred) +
geom_histogram(aes(x = craft_minus_bennet), binwidth = 30) +
geom_vline(xintercept = actual_final_round$crafts - actual_final_round$bennett, color = "red") +
xlab("Crafts minus Bennett") +
labs(subtitle = "Predictive distribution of differences in final round counts.\n Red line is actual difference.") +
theme_light()
Summary statistics:
post_samples_pred %>%
summarise(mean_craft = mean(predCraft_final_round),
lower95_craft = quantile(predCraft_final_round, 0.025),
upper95_craft = quantile(predCraft_final_round, 0.975),
mean_bennett = mean(predBennet_final_round),
lower95_bennett = quantile(predBennet_final_round, 0.025),
upper95_bennett = quantile(predBennet_final_round, 0.975),
prob_craft_win = mean(craft_win)) %>%
pivot_longer(everything(), names_to = "measurement")
## # A tibble: 7 x 2
## measurement value
## <chr> <dbl>
## 1 mean_craft 23124.
## 2 lower95_craft 22321.
## 3 upper95_craft 23982.
## 4 mean_bennett 16651.
## 5 lower95_bennett 15878.
## 6 upper95_bennett 17481.
## 7 prob_craft_win 1
post_samples_pred %>%
summarise(mean_pCraft = mean(pCraft),
lower95_pCraft = quantile(pCraft, probs = 0.025),
upper95_pCraft = quantile(pCraft, probs = 0.975),
mean_pBennet = mean(pBennet),
lower95_pBennet = quantile(pBennet, probs = 0.025),
upper95_pBennet = quantile(pCraft, probs = 0.975)) %>%
pivot_longer(everything(), names_to = "measurement") %>%
mutate_if(is.numeric, round, 2)
## # A tibble: 6 x 2
## measurement value
## <chr> <dbl>
## 1 mean_pCraft 0.4
## 2 lower95_pCraft 0.31
## 3 upper95_pCraft 0.49
## 4 mean_pBennet 0.33
## 5 lower95_pBennet 0.25
## 6 upper95_pBennet 0.49